Directive 3
To assess whether any of the measured baseline variables were
marginally associated with differences in disease-free survival, for
each baseline variable of interest, we fit a Cox proportional hazards
model. Survival curves estimated using the Cox model were also
presented, where, for continuous random variables, separate curves were
included for the 25th and 75th quantile.
Schoenfeld residual plots were inspected for all Cox models and were
tested for an association with time. The proportional hazards assumption
was violated for the model including prophylactic use of methotrexate
and that including donor age. As such, based on the pattern of the
Schoenfeld residuals, we included a linear association with time for
donor age and a piecewise linear association with time for use of
methotrexate. The Schoenfeld residual plots are included in the
appendix.
Are any of the measured baseline variables associated with
differences in disease-free survival?
- We think this is referring to marginal (univariable)
associations
- Include cox models for all variables (assess proportionality of
hazards)
- Include kaplan meier curves for the categorical ones (with
confidence intervals)
Optional: for continuous covariates, get survival curves from the cox
models - could plot survival curve for specific values Think about
potentially adding a fully adjusted model Could create a DAG and outline
the causal assumptions that would be needed
tdfs: time until relapse, death or end of study (in
days) deltadfs: disease-free survival indicator 1 = dead or
relapsed 0 = alive and disease-free
patient age and sex, donor age and sex, patient and donor
cytomegalovirus (CMV) immune status, the wait time from diagnosis to
transplantation, disease group, French-American-British (FAB)
classification based on standard morphological criteria, and
prophylactic use of methotrexate to prevent aGVHD
#---------------------------
# Directive 3
#---------------------------
# column names of all baseline variables of interest
covariates <- c("age", "donorage",
"male", "donormale",
"cmv", "donorcmv",
"waittime", "disgroup", "fab", "mtx")
# readable names of all baseline variables of interest
nice_names <- c("Age", "Donor Age",
"Sex (Male)", "Donor Sex (Male)",
"CMV", "Donor CMV",
"Wait Time for Transplant", "Disease Group",
"FAB Group", "Prophylactic Use\nof Methotrexate")
# read in the dataset and relabel factor variables to have meaningful levels
bmt <- read_csv(here("data/bmt.csv")) %>%
mutate(across(c(male, cmv, donorcmv, disgroup, donormale, fab, mtx),
as_factor)) %>%
mutate(male = ifelse(male==1, "Male", "Female"),
male = factor(male, levels=c("Female", "Male")),
donormale = ifelse(donormale==1, "Male", "Female"),
donormale = factor(donormale, levels=c("Female", "Male")),
cmv = ifelse(cmv==1, "CMV Positive", "CMV Negative"),
cmv = factor(cmv,
levels=c("CMV Negative", "CMV Positive")),
donorcmv = ifelse(donorcmv==1, "CMV Positive", "CMV Negative"),
donorcmv = factor(donorcmv,
levels=c("CMV Negative", "CMV Positive")),
disgroup = case_when(
disgroup == 1 ~ "Acute Lymphoblastic Leukemia",
disgroup == 2 ~ "Acute Myelocytic Leukemia (Low Risk)",
disgroup == 3 ~ "Acute Myelocytic Leukemia (High Risk)"),
disgroup=factor(disgroup,
levels =c("Acute Lymphoblastic Leukemia",
"Acute Myelocytic Leukemia (Low Risk)",
"Acute Myelocytic Leukemia (High Risk)")),
fab=ifelse(fab==1, "FAB grade 4 or 5 and AML", "Other"),
fab=factor(fab, levels =c("Other",
"FAB grade 4 or 5 and AML")),
mtx = ifelse(mtx==1, "Prophylactic Use of Methotrexate",
"No Prophylactic Use of Methotrexate"),
mtx=factor(mtx, levels=c("No Prophylactic Use of Methotrexate",
"Prophylactic Use of Methotrexate")),
hospital = factor(hospital),
waittime=waittime/365
)
Survival Curves
get_survival_curve <- function(var, nice_name) {
mod <- coxph(as.formula(paste0("Surv(tdfs, deltadfs) ~ ", var)),
data = bmt)
if (is.factor(bmt[[var]])) {
newdat <- tibble(
!!var := unique(bmt[[var]])
)
}
else {
newdat <- tibble(
!!var := round(quantile(bmt[[var]],probs= c(.25,.75), na.rm=TRUE),2))
}
est_by_group <- pmap_df(newdat, ~{
ggsurvfit(survfit(mod, newdata =tibble(!!var := .x)))$data %>%
mutate(strata = .x)}
)
if(!is.factor(bmt[[var]])) {
est_by_group <- est_by_group %>%
mutate(strata=paste0(nice_name, " = ", strata))
}
est_by_group %>%
select(estimate, strata, time, conf.high, conf.low) %>%
dplyr::arrange(strata, time) |>
dplyr::group_by(strata) |>
dplyr::mutate(
xmin = time,
xmax = dplyr::lead(time) # next step’s start
) |>
mutate(strata=gsub(paste0(var,"="),
"",
strata)) %>%
ggplot(aes(x=time,y=estimate,color=strata)) +
geom_step() +
geom_rect(aes(xmin = xmin, xmax = xmax,
ymin = conf.low, ymax = conf.high,
fill=strata),
alpha = 0.15,
inherit.aes=FALSE) +
theme_c(legend.text=element_text(size=16),
plot.title=element_text(size=16,
margin = margin(b = 5))) +
viridis::scale_color_viridis(discrete=TRUE,
option="magma", end=.8, begin=.3) +
viridis::scale_fill_viridis(discrete=TRUE,
option="magma", end=.8, begin=.3) +
labs(y="Disease-Free Survival", x= "Time (Days)",
title=paste0("By ", nice_name), color="", fill="") +
scale_x_continuous(n.breaks=7) +
guides(color=guide_legend(override.aes = list(linewidth=2)))
}
pltlist <- map2(covariates,nice_names,get_survival_curve)
# package for arranging plots
library(patchwork)
# arrange plots into a grid
(pltlist[[1]] + pltlist[[2]])/ (pltlist[[3]] + pltlist[[4]]) / (pltlist[[5]] + pltlist[[6]]) / (pltlist[[7]] + pltlist[[8]])/(pltlist[[9]] + pltlist[[10]]) + plot_annotation(
title = "Survival Curves by Baseline Variables of Interest",
theme=theme_c(plot.title=element_text(face="bold",hjust=.5, size=20)))

# save plot
ggsave(here("figures/surv_by_baseline.jpeg"))
library(gt)
library(gtsummary)
get_model_summary <- function(var, nice_name, model) {
# model <- coxph(as.formula(paste0("Surv(tdfs, deltadfs) ~ ", var)),
# data = bmt)
if (var == "male" | var == "donorcmv" | var == "donormale" ) {
model_summary <- tbl_regression(model, exponentiate=TRUE,
conf.level=.9,
estimate_fun=label_style_sigfig(digits=3),
pvalue_fun = label_style_pvalue(digits = 3),
show_single_row=all_of(var),
label=setNames(
list(nice_name, paste0("tt(", nice_name, ")")),
c(var, paste0("tt(",var, ")"))))
} else {
model_summary <- tbl_regression(model, exponentiate=TRUE,
conf.level=.9,
estimate_fun=label_style_sigfig(digits=3),
pvalue_fun = label_style_pvalue(digits = 3),
label=setNames(
list(nice_name, paste0("tt(", nice_name, ")")),
c(var, paste0("tt(",var, ")"))))
}
return(model_summary)
}
Schoenfeld Residuals
get_model <- function(var) {
model <- coxph(as.formula(paste0("Surv(tdfs, deltadfs) ~ ", var)),
data = bmt)
return(model)
}
model_list <- map(covariates, get_model)
names(model_list) <- covariates
# linear spline basis 1: min(t, c) # basis 2: (t - c)+)
#----------------------------
# plot Schoenfeld residuals
#----------------------------
library(splines)
get_res_plot <- function(var, nice_name, model) {
res <- as.data.frame(cox.zph(model, transform="km")$y)[[var]]
# pval <- cox.zph(model, transform="km")$table[var,"p"] %>%
# signif(3)
#
if(is.null(res)) {return(NULL)}
time <- cox.zph(model, transform="km")$time
df <- tibble(var = var,
time = time,
y=res)
# glimpse(df)
spline_fit <- lm(y ~ ns(time, df = 3), data = df)
ci <- predict(spline_fit, newdata = df,
interval = "confidence",
level = 0.95)
df_ci <- bind_cols(df, as.data.frame(ci))
df_ci %>%
mutate(
spline = predict(spline_fit)
) %>%
ggplot(aes(x=time)) +
geom_point(aes(y=y),alpha=.3) +
geom_line(aes(x=time,y=upr), linetype=2, linewidth=1.05) +
geom_line(aes(x=time,y=lwr), linetype=2, linewidth=1.05) +
geom_line(aes(x=time,y=spline),linewidth=1.05) +
theme_c(plot.title=element_text(face="plain", size=10,hjust=0)) +
labs(title=paste0(
"\nSchoenfeld Residuals for: ", nice_name,
# "\nP-value: ", pval,
"\n"))
}
plotlist <- pmap(list(var=covariates,
nice_name=nice_names,
model=model_list),
get_res_plot)
cowplot::plot_grid(plotlist=plotlist,ncol=2)

#-------------------------------------------------------------
# add time-varying covariates based on residual plots above
#-------------------------------------------------------------
# linear interaction with time
model_list[["donorage"]] <- coxph(
Surv(tdfs, deltadfs) ~ donorage + tt(donorage),
data = bmt,
tt=function(x,t,...) x*t )
# piecewise linear interaction with time (to handle U-shapes)
# before the cutpoint, slope is beta_1
# after the cutpoint, slope is beta_2
# continuous at cutpoint
model_list[["mtx"]] <- coxph(
Surv(tdfs, deltadfs) ~ mtx + tt(mtx),
data = bmt %>% mutate(mtx = ifelse(
mtx=="Prophylactic Use of Methotrexate", 1, 0)),
tt=function(x,t,...) cbind( x * pmin(t, 600), x * pmax(0, t - 600)))
Schoenfeld Residuals (High Leverage Point Excluded)
get_model_exclude_outlier <- function(var) {
model <- coxph(as.formula(paste0("Surv(tdfs, deltadfs) ~ ", var)),
data = bmt %>% filter(id != 69))
return(model)
}
model_list_exclude_outlier <- map(covariates, get_model_exclude_outlier)
names(model_list_exclude_outlier) <- covariates
# linear spline basis 1: min(t, c) # basis 2: (t - c)+)
#----------------------------
# plot Schoenfeld residuals
#----------------------------
library(splines)
plotlist <- pmap(list(var=covariates,
nice_name=nice_names,
model=model_list_exclude_outlier),
get_res_plot)
cowplot::plot_grid(plotlist=plotlist,ncol=2)

Testing Interactions with Time
# MTX
mtx_mod <- coxph(
Surv(tdfs, deltadfs) ~ mtx + tt(mtx),
data = bmt %>% mutate(mtx = ifelse(
mtx=="Prophylactic Use of Methotrexate", 1, 0)),
tt=function(x,t,...) x*t)
tbl_regression(mtx_mod, exponentiate=TRUE, conf.level=.9)
| Characteristic |
HR |
90% CI |
p-value |
| mtx |
3.43 |
1.83, 6.42 |
0.001 |
| tt(mtx) |
1.00 |
0.99, 1.00 |
0.014 |
# FAB
fab_mod <- coxph(
Surv(tdfs, deltadfs) ~ fab + tt(fab),
data = bmt %>% mutate(fab = ifelse(
fab=="Other", 0, 1)),
tt=function(x,t,...) x*t)
tbl_regression(fab_mod, exponentiate=TRUE, conf.level=.9)
| Characteristic |
HR |
90% CI |
p-value |
| fab |
1.72 |
1.05, 2.81 |
0.072 |
| tt(fab) |
1.00 |
1.00, 1.00 |
0.6 |
# cmv
cmv_mod <- coxph(
Surv(tdfs, deltadfs) ~ cmv + tt(cmv),
data = bmt %>% mutate(cmv = ifelse(
cmv=="CMV Positive", 1, 0)),
tt=function(x,t,...) x*t)
tbl_regression(cmv_mod, exponentiate=TRUE, conf.level=.9)
| Characteristic |
HR |
90% CI |
p-value |
| cmv |
1.30 |
0.79, 2.16 |
0.4 |
| tt(cmv) |
1.00 |
1.00, 1.00 |
0.6 |
# donor cmv
cmv_mod <- coxph(
Surv(tdfs, deltadfs) ~ donorcmv + tt(donorcmv),
data = bmt %>% mutate(donorcmv = ifelse(
donorcmv=="CMV Positive", 1, 0)),
tt=function(x,t,...) x*t)
tbl_regression(cmv_mod, exponentiate=TRUE, conf.level=.9)
| Characteristic |
HR |
90% CI |
p-value |
| donorcmv |
1.17 |
0.71, 1.95 |
0.6 |
| tt(donorcmv) |
1.00 |
1.00, 1.00 |
0.6 |
# sex
sex_mod <- coxph(
Surv(tdfs, deltadfs) ~ male + tt(male),
data = bmt %>% mutate(male = ifelse(
male=="Male", 1, 0)),
tt=function(x,t,...) x*t)
tbl_regression(sex_mod, exponentiate=TRUE, conf.level=.9)
| Characteristic |
HR |
90% CI |
p-value |
| male |
0.73 |
0.44, 1.21 |
0.3 |
| tt(male) |
1.00 |
1.00, 1.00 |
0.7 |
# donor sex
sex_mod <- coxph(
Surv(tdfs, deltadfs) ~ donormale + tt(donormale),
data = bmt %>% mutate(donormale = ifelse(
male=="Male", 1, 0)),
tt=function(x,t,...) x*t)
tbl_regression(sex_mod, exponentiate=TRUE, conf.level=.9)
| Characteristic |
HR |
90% CI |
p-value |
| donormale |
0.73 |
0.44, 1.21 |
0.3 |
| tt(donormale) |
1.00 |
1.00, 1.00 |
0.7 |
# donor age
donorsex_mod <- coxph(
Surv(tdfs, deltadfs) ~ donorage + tt(donorage),
data = bmt,
tt=function(x,t,...) x*t)
tbl_regression(donorsex_mod, exponentiate=TRUE, conf.level=.9)
| Characteristic |
HR |
90% CI |
p-value |
| donorage |
1.03 |
1.00, 1.06 |
0.063 |
| tt(donorage) |
1.00 |
1.00, 1.00 |
0.14 |
# disease group
disgroup_mod <- coxph(
Surv(tdfs, deltadfs) ~ is_low + is_high+ tt(is_high),
data = bmt %>%
mutate(is_low = ifelse(disgroup=="Acute Myelocytic Leukemia (Low Risk)",
1, 0),
is_high = ifelse(disgroup=="Acute Myelocytic Leukemia (High Risk)",
1, 0)),
tt=function(x,t,...) x*t )
tbl_regression(disgroup_mod, exponentiate=TRUE, conf.level=.9,
label=list(is_low="Acute Myelocytic Leukemia (Low Risk)",
is_high="Acute Myelocytic Leukemia (High Risk)",
`tt(is_high)`="tt(Acute Myelocytic Leukemia (High Risk))"))
| Characteristic |
HR |
90% CI |
p-value |
| Acute Myelocytic Leukemia (Low Risk) |
0.53 |
0.33, 0.86 |
0.031 |
| Acute Myelocytic Leukemia (High Risk) |
2.32 |
1.28, 4.21 |
0.020 |
| tt(Acute Myelocytic Leukemia (High Risk)) |
1.00 |
1.00, 1.00 |
0.079 |
#-------------------------------------------------------------
# add time-varying covariates based on residual plots above
#-------------------------------------------------------------
# piecewise linear interaction with time (to handle U-shapes)
# before the cutpoint, slope is beta_1
# after the cutpoint, slope is beta_2
# continuous at cutpoint
# model_list[["mtx"]] <- coxph(
# Surv(tdfs, deltadfs) ~ mtx + tt(mtx),
# data = bmt %>% mutate(mtx = ifelse(
# mtx=="Prophylactic Use of Methotrexate", 1, 0)),
# tt=function(x,t,...) cbind( x * pmin(t, 600), x * pmax(0, t - 600)))
model_list[["mtx"]] <- coxph(
Surv(tdfs, deltadfs) ~ mtx + tt(mtx),
data = bmt %>% mutate(mtx = ifelse(
mtx=="Prophylactic Use of Methotrexate", 1, 0)),
tt=function(x,t,...) x*t)
model_list[["disgroup"]] <- coxph(
Surv(tdfs, deltadfs) ~ is_low + is_high+ tt(is_high),
data = bmt %>%
mutate(is_low = ifelse(disgroup=="Acute Myelocytic Leukemia (Low Risk)",
1, 0),
is_high = ifelse(disgroup=="Acute Myelocytic Leukemia (High Risk)",
1, 0)),
tt=function(x,t,...) x*t )
Table
#---------------------------------------------------------------------------
# create combined table with HR's for each univariable model
#---------------------------------------------------------------------------
model_summary_list <- pmap(list(var=covariates,
nice_name=nice_names,
model=model_list),
get_model_summary)
tbl_stack(model_summary_list,
group_header=paste0('Model: ', nice_names)) %>%
as_gt() %>%
gt::tab_style(
style = cell_text(weight = "bold"),
locations = cells_row_groups()
)
| Characteristic |
HR |
90% CI |
p-value |
| Model: Age |
| Age |
1.01 |
0.992, 1.03 |
0.338 |
| Model: Donor Age |
| Donor Age |
1.01 |
0.994, 1.04 |
0.252 |
| Model: Sex (Male) |
| Sex (Male) |
0.795 |
0.552, 1.15 |
0.301 |
| Model: Donor Sex (Male) |
| Donor Sex (Male) |
0.991 |
0.681, 1.44 |
0.970 |
| Model: CMV |
| CMV |
|
|
|
| Â Â Â Â CMV Negative |
— |
— |
|
| Â Â Â Â CMV Positive |
1.17 |
0.813, 1.68 |
0.482 |
| Model: Donor CMV |
| Donor CMV |
1.05 |
0.726, 1.51 |
0.836 |
| Model: Wait Time for Transplant |
| Wait Time for Transplant |
0.970 |
0.806, 1.17 |
0.791 |
| Model: Disease Group |
| is_low |
0.534 |
0.331, 0.860 |
0.031 |
| is_high |
2.32 |
1.28, 4.21 |
0.020 |
| tt(is_high) |
0.998 |
0.996, 1.00 |
0.079 |
| Model: FAB Group |
| FAB Group |
|
|
|
| Â Â Â Â Other |
— |
— |
|
| Â Â Â Â FAB grade 4 or 5 and AML |
1.89 |
1.31, 2.73 |
0.004 |
| Model: Prophylactic Use
of Methotrexate |
| Prophylactic Use
of Methotrexate |
3.43 |
1.83, 6.42 |
0.001 |
| tt(Prophylactic Use
of Methotrexate) |
0.996 |
0.994, 0.999 |
0.014 |
# Fit model with continuous covariate
fit <- coxph(Surv(tdfs, deltadfs) ~ donorage, data = bmt)
# Choose representative values (e.g., quartiles)
quantiles <- quantile(bmt$donorage, probs = c(.1, .5, .9))
newdat <- data.frame(donorage = quantiles)
# Generate survival predictions
sf <- survfit(fit, newdata = newdat)
sf$surv %>%
as_tibble() %>%
mutate(time = sf$time) %>%
pivot_longer(-c(time)) %>%
ggplot(aes(x=time, y=value, color=name)) +
geom_step()
tibble(time=sf$time,
surv = sf$surv) %>%
ggplot(aes(x=time, y=surv)) +
geom_step()
plot(sf)
ggsurvfit(sf)
Directive 6
To assess the association between prophylactic use of methotrexate
and the risk of developing aGVHD, we estimated survival curves using an
adjusted Cox proportional hazards model, adjusting for the confounders
age, wait time until transplant, donor age, sex mismatch, CMV mismatch,
and disease group. Sex mismatch was encoded as a 3-level factor:
donor-recipient sex match; donor male, recipient female; and donor
female, recipient male. A sex match was taken to be the reference
category. CMV mismatch was encoded as a 4 level factor: CMV negative in
both donor and recipient; CMV positive in donor, negative in recipient;
CMV negative in donor, positive in recipient; and CMV positive in both
donor and recipient. While death is a competing event for the
development of aGVHD, this was not a problem in our sample because no
patient died before developing aGVHD, and hence we did not need to
explicitly handle competing events for this analysis.
We used the g-computation formula to calculate treatment specific
survival curves, and used bootstrap confidence intervals to estimate the
difference in event-free survival probabilities at a subset of
clinically relevant time points, 14 days, 30, days, 60 days, and 90
days.
The confounders were selected based on clinical reasoning. That is,
variables that could plausibly be related to both treatment and the
development of aGVHD were considered. Several of the baseline variables
can be considered proxies for underlying disease severity, which may
affect both whether a patient was prioritized for prophylactic use of
methotrexate – for example, via a clinician’s guidance to seek treatment
at a center where prophylactic use of methotrexate was the protocol –
and a patient’s development of aGVHD. While wait time could conceivably
be related to both the treatment and development of aGVHD, a short wait
time could also be reflective of a patient’s obtaining a transplant from
a family member or friend, and as such, we concluded wait time was a
less useful proxy for underlying disease severity.
bmt <- bmt %>%
mutate(sex_mismatch = paste0(donormale, "->", male),
sex_mismatch = ifelse(sex_mismatch == "Male->Male",
"Match", sex_mismatch),
sex_mismatch = ifelse(sex_mismatch == "Female->Female",
"Match", sex_mismatch),
sex_mismatch = factor(
sex_mismatch,
levels=c("Match", "Male->Female","Female->Male")),
cmv_mismatch = paste0(donorcmv, "->", cmv),
cmv_mismatch = factor(
cmv_mismatch,
levels=c("CMV Negative->CMV Negative",
"CMV Negative->CMV Positive",
"CMV Positive->CMV Negative",
"CMV Positive->CMV Positive")))
Perfect Collinearity by Hospital
# perfect collinearity
table(bmt$hospital, bmt$mtx)
##
## No Prophylactic Use of Methotrexate Prophylactic Use of Methotrexate
## 1 76 0
## 2 0 17
## 3 0 23
## 4 21 0
No Patient Dies Before aGVHD
bmt %>%
filter(ts < ta) %>%
nrow()
## [1] 0
Including FAB
model <- coxph(Surv(ta, deltaa) ~
mtx + age + donorage +
# waittime +
fab + disgroup+
sex_mismatch +
cmv_mismatch, data=bmt
)
Excluding FAB
model <- coxph(Surv(ta, deltaa) ~
mtx + age + donorage +
# waittime + fab +
disgroup+
sex_mismatch +
cmv_mismatch,
data=bmt
)
Proportional Hazards Assumption
cox.zph(model, transform="km")$table %>%
as.data.frame() %>%
kbl(caption="Proportional Hazards Test") %>%
kable_paper()
Proportional Hazards Test
|
|
chisq
|
df
|
p
|
|
mtx
|
0.1608288
|
1
|
0.6883946
|
|
age
|
1.1104426
|
1
|
0.2919858
|
|
donorage
|
1.2587468
|
1
|
0.2618884
|
|
disgroup
|
4.6988143
|
2
|
0.0954257
|
|
sex_mismatch
|
2.9001290
|
2
|
0.2345552
|
|
cmv_mismatch
|
3.0752872
|
3
|
0.3801624
|
|
GLOBAL
|
10.7956582
|
10
|
0.3736583
|
mod_resid <- as.data.frame(cox.zph(model, transform="km")$y) %>%
as_tibble()
time <- cox.zph(model, transform="km")$time
df <- mod_resid %>%
bind_cols(time=time)
df %>%
pivot_longer(-time) %>%
ggplot(aes(x=time,y=value)) +
geom_point() +
geom_smooth(formula=y~x,method="loess",span=5) +
facet_wrap(~name, scales="free_y") +
theme_c()

mod_resid <- as.data.frame(cox.zph(model, transform="km")$y) %>%
as_tibble()
time <- cox.zph(model, transform="log")$time
df <- mod_resid %>%
bind_cols(time=time)
df %>%
pivot_longer(-time) %>%
ggplot(aes(x=time,y=value)) +
geom_point() +
geom_smooth(formula=y~x,method="loess",span=5) +
facet_wrap(~name, scales="free_y") +
theme_c()
df %>%
pivot_longer(-time) %>%
ggplot(aes(x=time,y=value)) +
geom_point() +
geom_smooth(formula=y~x,method="loess",span=5) +
facet_wrap(~name, scales="free_y") +
theme_c()
model <- coxph(Surv(ta, deltaa) ~
mtx + age + donorage +
tt(waittime) +
fab + disgroup+
sex_mismatch +
cmv_mismatch, data=bmt,
tt=function(x,t,...) x*splines::ns(t,df=2))
G-Computation
#------------------------------------------------------------
# g-computation to compute marginalized survival curves
#------------------------------------------------------------
surv_treat <- survfit(model, newdata=bmt %>%
mutate(mtx="Prophylactic Use of Methotrexate")) %>%
broom::tidy() %>%
rowwise() %>%
mutate(
mean_surv = mean(c_across(contains("estimate")), na.rm = TRUE)
) %>%
ungroup() %>%
select(time, mean_surv)
surv_no_treat <- survfit(model, newdata=bmt %>%
mutate(mtx="No Prophylactic Use of Methotrexate")) %>%
broom::tidy() %>%
rowwise() %>%
mutate(
mean_surv = mean(c_across(contains("estimate")), na.rm = TRUE)
) %>%
ungroup() %>%
select(time, mean_surv)
surv_est_dir6 <- surv_treat %>%
mutate(treatment="Prophylactic Use of Methotrexate") %>%
bind_rows(surv_no_treat %>%
mutate(treatment="No Prophylactic Use of Methotrexate"))
surv_est_dir6 %>%
ggplot(aes(x=time, y=mean_surv, color=treatment))+
geom_step() +
coord_cartesian(xlim=c(0,150)) +
theme_c(axis.title=element_text(size=11),
legend.text=element_text(size=11),
legend.title=element_text(size=13)) +
viridis::scale_color_viridis(
discrete=TRUE, option="mako", end=.8, begin=.2) +
labs(x="Time (Days)", y = "Event-Free Survival")

ggsave(here("figures/directive_6.jpeg"))
# cox.zph(model)
broom::tidy(model,
exponentiate=TRUE,
conf.int=TRUE) %>%
mutate(term=gsub(
"sex_mismatch|disgroup|cmv_mismatch|fab|mtx",
"",term)) %>%
mutate(term=factor(term),
term=fct_reorder(term,estimate)) %>%
ggplot(aes(y=term, xmin=conf.low,xmax=conf.high))+
geom_point(aes(x=estimate), alpha=.4, size=1.1) +
geom_errorbar(width=.2) +
geom_vline(aes(xintercept=1),
color='darkred',
linetype=2) +
theme_c(axis.text.y=element_text(size=12, hjust=1)) +
labs(y="", x= "Hazard Ratio")

gtsummary::tbl_regression(model,conf.level=.9,exponentiate=TRUE,
pvalue_fun = label_style_pvalue(digits = 3))
| Characteristic |
HR |
90% CI |
p-value |
| mtx |
|
|
|
| Â Â Â Â No Prophylactic Use of Methotrexate |
— |
— |
|
| Â Â Â Â Prophylactic Use of Methotrexate |
0.57 |
0.25, 1.31 |
0.270 |
| age |
1.05 |
1.00, 1.11 |
0.125 |
| donorage |
1.03 |
0.98, 1.09 |
0.373 |
| disgroup |
|
|
|
| Â Â Â Â Acute Lymphoblastic Leukemia |
— |
— |
|
| Â Â Â Â Acute Myelocytic Leukemia (Low Risk) |
0.70 |
0.30, 1.60 |
0.477 |
| Â Â Â Â Acute Myelocytic Leukemia (High Risk) |
0.34 |
0.13, 0.86 |
0.057 |
| sex_mismatch |
|
|
|
| Â Â Â Â Match |
— |
— |
|
| Â Â Â Â Male->Female |
1.09 |
0.47, 2.51 |
0.866 |
| Â Â Â Â Female->Male |
1.65 |
0.69, 3.91 |
0.341 |
| cmv_mismatch |
|
|
|
| Â Â Â Â CMV Negative->CMV Negative |
— |
— |
|
| Â Â Â Â CMV Negative->CMV Positive |
0.57 |
0.18, 1.86 |
0.437 |
| Â Â Â Â CMV Positive->CMV Negative |
1.71 |
0.66, 4.49 |
0.356 |
| Â Â Â Â CMV Positive->CMV Positive |
1.47 |
0.64, 3.38 |
0.442 |
# ggsave(here("directive_6.jpeg"))
#--------------------------------------------------------
# obtain bootstrapped differences in survival curves
#--------------------------------------------------------
get_marginal_surv <- function(dat,
input_times = c(14, 30, 60, 90)) {
model <- coxph(Surv(ta, deltaa) ~
mtx + age + donorage +
waittime +
fab + disgroup+
sex_mismatch +
cmv_mismatch, data=dat
)
surv_treat <- survfit(model, newdata=dat %>%
mutate(mtx="Prophylactic Use of Methotrexate"))
surv_treat_df <- tibble(time = input_times) %>%
bind_cols(summary(surv_treat, times=input_times)$surv) %>%
rowwise() %>%
mutate(
mean_surv = mean(c_across(!matches("time")), na.rm = TRUE)
) %>%
select(time,surv_treat=mean_surv)
surv_no_treat <- survfit(model, newdata=dat %>%
mutate(mtx="No Prophylactic Use of Methotrexate"))
surv_no_treat_df <- tibble(time = input_times) %>%
bind_cols(summary(surv_no_treat, times=input_times)$surv) %>%
rowwise() %>%
mutate(
mean_surv = mean(c_across(!matches("time")), na.rm = TRUE)
) %>%
select(time,surv_no_treat=mean_surv)
surv_no_treat_df %>%
left_join(surv_treat_df) %>%
mutate(difference=surv_treat-surv_no_treat)
}
boot_surv <- map_df(1:5000, ~ {
ind <- sample(1:nrow(bmt), replace=TRUE)
boot_samp <- bmt[ind,]
get_marginal_surv(boot_samp)
})
saveRDS(boot_surv, here('data/boot_surv_directive_6.RDS'))
est <- surv_est_dir6 %>%
pivot_wider(names_from=treatment, values_from=mean_surv) %>%
mutate(difference=`Prophylactic Use of Methotrexate`-`No Prophylactic Use of Methotrexate`) %>%
filter(time == 10 | time == 30 | time == 53 |
time== 88) %>%
pull(difference)
boot_surv <- readRDS(here('data/boot_surv_directive_6.RDS'))
boot_surv %>%
group_by(time) %>%
summarize(lower = quantile(difference,.05),
upper=quantile(difference,.95)) %>%
bind_cols(estimate=est) %>%
mutate(
time_lab = paste0(time, " Days"),
time_lab=factor(time_lab),
time_lab=fct_reorder(time_lab,time)) %>%
ggplot(aes(y=time_lab, xmin=lower,xmax=upper)) +
geom_point(aes(y=time_lab,x=estimate))+
geom_errorbar(width=.3) +
theme_c(axis.title.x=element_text(size=16),
axis.text.y=element_text(size=14)) +
geom_vline(aes(xintercept=0), color="darkred", linetype=2) +
labs(x = latex2exp::TeX("$\\hat{S}_1(t) - \\hat{S}_0(t)$"),
y="")

library(kableExtra)
boot_surv %>%
group_by(time) %>%
summarize(lower = quantile(difference,.05),
upper=quantile(difference,.95)) %>%
mutate(across(where(is.numeric), ~round(.x,4))) %>%
kbl(caption="Bootstrap Confidence Intervals for $\\hat{S}_1(t) - \\hat{S}_0(t)$") %>%
kable_paper()
Bootstrap Confidence Intervals for \(\hat{S}_1(t) - \hat{S}_0(t)\)
|
time
|
lower
|
upper
|
|
14
|
-0.0095
|
0.0184
|
|
30
|
-0.0434
|
0.1451
|
|
60
|
-0.0585
|
0.1819
|
|
90
|
-0.0640
|
0.2117
|
Directive 7
Based on the available data, is recovery of normal platelet levels
associated with improved disease-free survival? Is it associated with a
decreased risk of relapse?
To assess whether the recovery of normal platelet levels associated
with disease-free survival, we fit a Cox proportional hazards model with
the recovery of platelet levels as a time-varying covariate, adjusting
for the confounders age, wait time until transplant, donor age,
donor-recipient sex mismatch, donor-recipient CMV mismatch, disease
group, and prophylactic use of methotrexate. While death could act as a
competing event, no patient in our sample died before their return to
normal platelet levels, so it was not necessary to handle competing
events for this analysis.
Is recovery of normal platelet levels associated with improved
disease-free survival?
Fully Adjusted Model
# Age, fab classification, wait time until transplant, donor age, donor sex, cmv, sex, donor cmv, disease group
dat <- bmt %>%
select(age,fab, waittime, disgroup,
deltap, tp, deltadfs, sex_mismatch,
cmv_mismatch,donorage,mtx,
tdfs) %>%
mutate(id=row_number())
dat <- tmerge(dat, dat, id=id,endpt=event(tdfs,deltadfs))
dat <- tmerge(dat, dat, id=id, plat=tdc(tp, deltap,init=0))
model <- coxph( Surv(tstart, tstop, endpt) ~ age +
#waittime + fab +
cmv_mismatch + disgroup + plat +donorage + sex_mismatch +
mtx, data=dat)
gtsummary::tbl_regression(model,
label=list(plat="Recovery of Normal Platelet Levels",
age="Age",
fab="FAB Classification",
male="Sex",
disgroup="Disease Group",
cmv="Patient CMV",
donorage="Donor Age",
sex_mismatch="Sex Mismatch",
mtx="Treatment" #,
# waittime="Wait Time"
),
exponentiate=TRUE,
conf.level=.9,
pvalue_fun = label_style_pvalue(digits = 3))
| Characteristic |
HR |
90% CI |
p-value |
| Age |
1.00 |
0.97, 1.04 |
0.803 |
| cmv_mismatch |
|
|
|
| Â Â Â Â CMV Negative->CMV Negative |
— |
— |
|
| Â Â Â Â CMV Negative->CMV Positive |
1.10 |
0.65, 1.86 |
0.769 |
| Â Â Â Â CMV Positive->CMV Negative |
1.00 |
0.55, 1.83 |
0.990 |
| Â Â Â Â CMV Positive->CMV Positive |
0.84 |
0.51, 1.37 |
0.554 |
| Disease Group |
|
|
|
| Â Â Â Â Acute Lymphoblastic Leukemia |
— |
— |
|
| Â Â Â Â Acute Myelocytic Leukemia (Low Risk) |
0.60 |
0.35, 1.02 |
0.115 |
| Â Â Â Â Acute Myelocytic Leukemia (High Risk) |
1.38 |
0.84, 2.28 |
0.288 |
| Recovery of Normal Platelet Levels |
0.32 |
0.18, 0.56 |
<0.001 |
| Donor Age |
1.01 |
0.98, 1.04 |
0.646 |
| Sex Mismatch |
|
|
|
| Â Â Â Â Match |
— |
— |
|
| Â Â Â Â Male->Female |
1.38 |
0.88, 2.15 |
0.236 |
| Â Â Â Â Female->Male |
1.12 |
0.67, 1.87 |
0.705 |
| Treatment |
|
|
|
| Â Â Â Â No Prophylactic Use of Methotrexate |
— |
— |
|
| Â Â Â Â Prophylactic Use of Methotrexate |
1.19 |
0.76, 1.86 |
0.526 |
Marginal Model
#-----------------------
# marginal model
#-----------------------
model <- coxph( Surv(tstart, tstop, endpt) ~plat, data=dat)
gtsummary::tbl_regression(model,
label=list(plat="Platelet Count",
age="Age",
fab="FAB Classification",
male="Sex",
disgroup="Disease Group",
cmv="Patient CMV"#,
# waittime="Wait Time"
),
exponentiate=TRUE,
conf.level=.9,
pvalue_fun = label_style_pvalue(digits = 3))
| Characteristic |
HR |
90% CI |
p-value |
| Platelet Count |
0.28 |
0.16, 0.48 |
<0.001 |
Is recovery of normal platelet levels associated with relapse?
To evaluate whether there was an association between the recovery of
normal platelet levels and the risk of relapse, we did need to handle
the fact that death is a competing event. To do so, we used a
semiparametric proportional sub-distribution odds model, which was fit
with the implementation provided by the timereg R package.
dat <- bmt %>%
select(age,fab, waittime, disgroup,
deltap, tp, deltadfs, deltar,
deltas, sex_mismatch,cmv_mismatch,donorage,
mtx,
tdfs) %>%
mutate(id=row_number())
dat <- tmerge(dat, dat, id=id,endpt=event(tdfs,deltadfs))
dat <- tmerge(dat, dat, id=id, plat=tdc(tp, deltap,init=0))
library(timereg)
dat <- dat %>%
mutate(cause = case_when(
deltas == 1 & deltar == 0 ~ 1,
deltar == 1 ~ 2,
deltar == 0 & deltas == 0 ~ 0
),
cause=as.integer(cause))
bmt_copy <- bmt
odds_mod <- prop.odds.subdist(Event(tdfs, cause) ~ plat,
cause = 2,
data=dat)
odds_mod <- prop.odds.subdist(Event(tdfs, cause) ~
age + # fab+ waittime+# fab +# waittime +
cmv_mismatch + disgroup +donorage + sex_mismatch +
plat + mtx,
cause = 2,
data=dat)
coef(odds_mod) %>% as.data.frame() %>%
as_tibble(rownames="variable") %>%
mutate(across(where(is.numeric), ~round(.x,4))) %>%
kbl() %>%
kable_paper()
|
variable
|
Coef.
|
SE
|
Robust SE
|
D2log(L)^-1
|
z
|
P-val
|
lower2.5%
|
upper97.5%
|
|
age
|
0.0147
|
0.0207
|
0.0244
|
0.0246
|
0.605
|
0.5450
|
-0.0259
|
0.0553
|
|
cmv_mismatchCMV Negative->CMV Positive
|
0.4790
|
0.3930
|
0.4010
|
0.3890
|
1.190
|
0.2320
|
-0.2910
|
1.2500
|
|
cmv_mismatchCMV Positive->CMV Negative
|
-0.1540
|
0.4720
|
0.4910
|
0.4750
|
-0.313
|
0.7540
|
-1.0800
|
0.7710
|
|
cmv_mismatchCMV Positive->CMV Positive
|
0.2600
|
0.3660
|
0.3820
|
0.3730
|
0.679
|
0.4970
|
-0.4570
|
0.9770
|
|
disgroupAcute Myelocytic Leukemia (Low Risk)
|
-1.1800
|
0.3960
|
0.3870
|
0.3910
|
-3.060
|
0.0022
|
-1.9600
|
-0.4040
|
|
disgroupAcute Myelocytic Leukemia (High Risk)
|
0.4700
|
0.3710
|
0.3710
|
0.3610
|
1.270
|
0.2050
|
-0.2570
|
1.2000
|
|
donorage
|
-0.0284
|
0.0216
|
0.0242
|
0.0223
|
-1.170
|
0.2410
|
-0.0707
|
0.0139
|
|
sex_mismatchMale->Female
|
0.4090
|
0.3230
|
0.3290
|
0.3230
|
1.240
|
0.2140
|
-0.2240
|
1.0400
|
|
sex_mismatchFemale->Male
|
-0.5510
|
0.4130
|
0.4270
|
0.4000
|
-1.290
|
0.1970
|
-1.3600
|
0.2580
|
|
plat
|
0.0620
|
0.2740
|
0.2740
|
0.2720
|
0.226
|
0.8210
|
-0.4750
|
0.5990
|
|
mtxProphylactic Use of Methotrexate
|
0.1610
|
0.2870
|
0.3010
|
0.3230
|
0.534
|
0.5940
|
-0.4020
|
0.7240
|
coef(odds_mod) %>% as.data.frame() %>%
as_tibble(rownames="variable") %>%
rename(coef=`Coef.`) %>%
mutate(lower = coef + qnorm(.05)*SE,
upper= coef+ qnorm(.95)*SE) %>%
mutate(across(c(coef,lower,upper),exp)) %>%
mutate(variable=gsub(
"sex_mismatch|disgroup|cmv_mismatch|fab|mtx",
"",variable),
variable=gsub("plat", "Recovery of Normal Platelet Levels", variable),
variable=gsub("waittime", "Wait Time for Transplant", variable),
variable=gsub("age", "Age", variable),
variable=gsub("donorAge", "Donor Age", variable)) %>%
# mutate(across(c(`Coef.`, `lower2.5%`,`upper97.5%`), exp)) %>%
ggplot(aes(y=fct_reorder(variable,coef),
xmin=lower,
xmax=upper)) +
geom_errorbar(width=.2) +
geom_point(aes(x=coef),size=.55) +
geom_vline(xintercept=1,linetype=2,color="darkred") +
theme_c(axis.text.y=element_text(size=11, hjust=1),
axis.title.x=element_text(size=11.5)) +
labs(y="", x="Odds Ratio")

ggsave(here("figures/directive_7.jpeg"))
coef(odds_mod) %>% as.data.frame() %>%
as_tibble(rownames="variable") %>%
rename(coef=`Coef.`) %>%
mutate(lower = coef + qnorm(.05)*SE,
upper= coef+ qnorm(.95)*SE,
lower=exp(lower),
upper=exp(upper),
coef=round(exp(coef),3),
CI= paste0(round(lower,3), ", ", round(upper, 3)),
`P-val`= round(`P-val`,3)) %>%
select(Characteristic=variable, OR=coef, `90% CI`=CI,
`p-value`=`P-val`) %>%
mutate(Characteristic = gsub(
"cmv_mismatch|disgroup|sex_mismatch", "",
Characteristic),
Characteristic=str_to_title(Characteristic),
Characteristic=gsub("Cmv", "CMV", Characteristic),
Characteristic=gsub("Donorage", "Donor Age", Characteristic),
Characteristic=gsub("Plat",
"Recovery of Normal Platelet Levels",
Characteristic)) %>%
kbl() %>%
kable_styling(bootstrap_options="striped")
|
Characteristic
|
OR
|
90% CI
|
p-value
|
|
Age
|
1.015
|
0.981, 1.05
|
0.545
|
|
CMV Negative->CMV Positive
|
1.614
|
0.846, 3.082
|
0.232
|
|
CMV Positive->CMV Negative
|
0.857
|
0.394, 1.863
|
0.754
|
|
CMV Positive->CMV Positive
|
1.297
|
0.71, 2.368
|
0.497
|
|
Acute Myelocytic Leukemia (Low Risk)
|
0.307
|
0.16, 0.589
|
0.002
|
|
Acute Myelocytic Leukemia (High Risk)
|
1.600
|
0.869, 2.945
|
0.205
|
|
Donor Age
|
0.972
|
0.938, 1.007
|
0.241
|
|
Male->Female
|
1.505
|
0.885, 2.561
|
0.214
|
|
Female->Male
|
0.576
|
0.292, 1.137
|
0.197
|
|
Recovery of Normal Platelet Levels
|
1.064
|
0.678, 1.67
|
0.821
|
|
Mtxprophylactic Use Of Methotrexate
|
1.175
|
0.733, 1.883
|
0.594
|
Directive 4
dat <- bmt %>%
select(age,fab, waittime, disgroup,
deltap, tp, deltadfs, deltar,
deltas, sex_mismatch,cmv_mismatch,donorage,
mtx,
tdfs,ta,deltaa) %>%
mutate(id=row_number())
dat <- tmerge(dat, dat, id=id,endpt=event(tdfs,deltadfs))
dat <- tmerge(dat, dat, id=id, agvhd=tdc(ta, deltaa,init=0))
library(timereg)
dat <- dat %>%
mutate(cause = case_when(
# death
deltas == 1 & deltar == 0 ~ 1,
# relapse
deltar == 1 ~ 2,
deltar == 0 & deltas == 0 ~ 0
),
cause=as.integer(cause))
# marginal association
odds_mod_marg <- prop.odds.subdist(Event(tdfs, cause) ~agvhd ,
cause = 2,
data=dat)
summary(odds_mod_marg)
## Proportional Odds model
##
## Test for baseline
## Test for nonparametric terms
##
## Test for non-significant effects
## Supremum-test of significance p-value H_0: B(t)=0
## Baseline 5.52 0
##
## Test for time invariant effects
## Kolmogorov-Smirnov test p-value H_0:constant effect
## Baseline 0.113 0
##
## Covariate effects
## Coef. SE Robust SE D2log(L)^-1 z P-val lower2.5% upper97.5%
## agvhd -0.596 0.525 0.525 0.524 -1.13 0.256 -1.62 0.433
## Test of Goodness-of-fit
## sup| hat U(t) | p-value H_0
## agvhd 1.07 0.63
odds_mod <- prop.odds.subdist(Event(tdfs, cause) ~
age + # fab+ waittime+# fab +# waittime +
cmv_mismatch + disgroup +donorage + sex_mismatch +
agvhd + mtx,
cause = 2,
data=dat)
summary(odds_mod)
## Proportional Odds model
##
## Test for baseline
## Test for nonparametric terms
##
## Test for non-significant effects
## Supremum-test of significance p-value H_0: B(t)=0
## Baseline 1.42 0.396
##
## Test for time invariant effects
## Kolmogorov-Smirnov test p-value H_0:constant effect
## Baseline 0.144 0.236
##
## Covariate effects
## Coef. SE Robust SE
## age -0.01150 0.0259 0.0291
## cmv_mismatchCMV Negative->CMV Positive 0.76200 0.5200 0.5280
## cmv_mismatchCMV Positive->CMV Negative 0.34600 0.5890 0.6220
## cmv_mismatchCMV Positive->CMV Positive 0.47200 0.4740 0.4930
## disgroupAcute Myelocytic Leukemia (Low Risk) -1.13000 0.5190 0.5110
## disgroupAcute Myelocytic Leukemia (High Risk) 0.59500 0.4620 0.4690
## donorage -0.00596 0.0281 0.0302
## sex_mismatchMale->Female 0.17300 0.4260 0.4270
## sex_mismatchFemale->Male -0.84600 0.5400 0.5540
## agvhd -0.48600 0.5510 0.5320
## mtxProphylactic Use of Methotrexate 0.02270 0.3670 0.3870
## D2log(L)^-1 z P-val
## age 0.0304 -0.3950 0.6930
## cmv_mismatchCMV Negative->CMV Positive 0.5100 1.4400 0.1490
## cmv_mismatchCMV Positive->CMV Negative 0.5890 0.5550 0.5790
## cmv_mismatchCMV Positive->CMV Positive 0.4910 0.9580 0.3380
## disgroupAcute Myelocytic Leukemia (Low Risk) 0.5120 -2.2100 0.0272
## disgroupAcute Myelocytic Leukemia (High Risk) 0.4660 1.2700 0.2050
## donorage 0.0281 -0.1970 0.8440
## sex_mismatchMale->Female 0.4270 0.4060 0.6850
## sex_mismatchFemale->Male 0.5340 -1.5300 0.1270
## agvhd 0.5490 -0.9140 0.3610
## mtxProphylactic Use of Methotrexate 0.4210 0.0588 0.9530
## lower2.5% upper97.5%
## age -0.0623 0.0393
## cmv_mismatchCMV Negative->CMV Positive -0.2570 1.7800
## cmv_mismatchCMV Positive->CMV Negative -0.8080 1.5000
## cmv_mismatchCMV Positive->CMV Positive -0.4570 1.4000
## disgroupAcute Myelocytic Leukemia (Low Risk) -2.1500 -0.1130
## disgroupAcute Myelocytic Leukemia (High Risk) -0.3110 1.5000
## donorage -0.0610 0.0491
## sex_mismatchMale->Female -0.6620 1.0100
## sex_mismatchFemale->Male -1.9000 0.2120
## agvhd -1.5700 0.5940
## mtxProphylactic Use of Methotrexate -0.6970 0.7420
## Test of Goodness-of-fit
## sup| hat U(t) | p-value H_0
## age 30.400 0.718
## cmv_mismatchCMV Negative->CMV Positive 1.390 0.704
## cmv_mismatchCMV Positive->CMV Negative 1.370 0.486
## cmv_mismatchCMV Positive->CMV Positive 1.610 0.678
## disgroupAcute Myelocytic Leukemia (Low Risk) 3.930 0.008
## disgroupAcute Myelocytic Leukemia (High Risk) 3.130 0.042
## donorage 44.400 0.496
## sex_mismatchMale->Female 1.440 0.778
## sex_mismatchFemale->Male 1.470 0.394
## agvhd 0.992 0.672
## mtxProphylactic Use of Methotrexate 3.300 0.030